import xlrd
import os
import os.path
import sys
import django

pro_dir = os.getcwd()
sys.path.append(pro_dir)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mitagger.settings")

from home.models import Image

def getExcelDict(basePath):
    excelDict = dict()
    for parentDir, dirnames, filenames in os.walk(basePath):
        for filename in filenames:
            if not filename.endswith('xlsx'):
                continue
            fullFilename = (parentDir if parentDir.endswith('/') else parentDir + '/') + filename
            medical_excel = xlrd.open_workbook(fullFilename)
            medical_table = medical_excel.sheets()[0]
            nrows = medical_table.nrows
            for i in range(1, nrows):
                excelDict[table.cell(i, 0).value] = fullFilename
    return excelDict

def getImagePathDict(basePath):
    pathDict = dict()
    for parentDir, dirnames, filenames in os.walk(basePath):
        for filename in filenames:
            if filename.lower().endswith('tif'):
                fullFilename = (parentDir if parentDir.endswith('/') else parentDir + '/') + filename
                pathDict[filename] = fullFilename

def checkIsImageInDB(filename):
    return len(Image.objects.filter(file_name = filename)) > 0

def mainFunc(basePath):
    print 'get path dict'
    pathDict = getImagePathDict('uploads')
    totalImages = 0
    for parentDir, dirnames, filenames in os.walk(basePath):
        for filename in filenames:
            if not filename.endswith('xlsx'):
                continue
            print 'processing xlsx file %s' % filename
            fullFilename = (parentDir if parentDir.endswith('/') else parentDir + '/') + filename
            medical_excel = xlrd.open_workbook(fullFilename)
            medical_table = medical_excel.sheets()[0]
            nrows = medical_table.nrows
            for i in range(1, nrows):
                imageFilename  = medical_table.cell(i, 0).value
                if not checkIsImageInDB(imageFilename):
                    print 'File %s processing' % imageFilename
                    # do import
                    newImage = Image(file_name = imageFilename)
                    newImage.title = medical_table.cell(i, 2)
                    newImage.legend = medical_table.cell(i, 3)
                    newImage.doi_link = medical_table.cell(i, 9)
                    newImage.label = medical_table.cell(i, 1)
                    newImage.relevant_articleDOI = medical_table.cell(i, 5)
                    try:
                        newImage.image_file_location = pathDict[imageFilename]
                    except:
                        print 'File %s location can not found' % imageFilename
                    try:
                        newImage.save()
                        totalImages += 1
                    except:
                        print 'File %s not saved' % imageFilename
    print 'Total imported image: %d' % totalImages

if __name__ == '__main__':
    django.setup()
    mainFunc('images/xlsx/xlsx/')
